#pragma once
#include <sys/cdefs.h>
#include "esmd_types.h"
#include <tuple>
#ifdef __sw_slave__
#include <qthread_slave.h>
extern double xacos(double);
extern double xatan2(double, double);
extern double xsin(double);
extern double xcos(double);
#define P_RPI 0.3183098861837907
#define P_PI_HI 3.141592653589793
#define P_PI_LO 1.2246467991473532072e-16
static __ldm_fix double sincos_coefs[14] = {
    -2.617524622980400389e-07,  //0
    2.4767780624795465022e-05,  //1
    -0.0013888525605711818031,  //2
    0.041666650704416309525,    //3
    -0.49999999803407579879,    //4
    1,                          //5
    2.6052564198872795625e-06,  //6
    -0.00019809081657093404838, //7
    0.0083330510201351341848,   //8
    -0.16666657982798208093,    //9
    0.99999999572039277584,     //10
    0.3183098861837907,         //11
    3.141592653589793,          //12
    1.2246467991473532072e-16   //13
};
// __thread_local_fix double test = 2;
// #define pcos xcos
// #define psin xsin
INLINE double psin(double x) {
  // return xsin(x);
  double xdivpih;
  double sign;
  asm("fcvtdlr %2, 1, %1\n\t"
      "sll %1, 63, %0\n\t"
      "fcvtld %1, %1\n\t"
      : "=&r"(sign), "=&r"(xdivpih)
      : "r"(x * sincos_coefs[11]));
  xdivpih += 0.5;
  double q = x - xdivpih * sincos_coefs[12] - xdivpih * sincos_coefs[13];
  double q2 = q * q;
  double u = sincos_coefs[0];
  u = u * q2 + sincos_coefs[1];
  u = u * q2 + sincos_coefs[2];
  u = u * q2 + sincos_coefs[3];
  u = u * q2 + sincos_coefs[4];
  u = u * q2 + sincos_coefs[5];
  asm("xor %0, %1, %2\n\t"
      : "=r"(u)
      : "r"(sign), "0"(u));
  return u;
}
INLINE double pcos(double x) {
  // return xcos(x);
  double xdivpih;
  double sign;
  // cal_locked_printf("%f %f\n", sincos_coefs[11], test);
  asm("fcvtdlr %2, 1, %1\n\t"
      "sll %1, 63, %0\n\t"
      "fcpysn %0, %0, %0\n\t"
      "fcvtld %1, %1\n\t"
      // "halt\n\t"
      : "=&r"(sign), "=&r"(xdivpih)
      : "r"(x * sincos_coefs[11]));
  xdivpih += 0.5;
  double q = x - xdivpih * sincos_coefs[12] - xdivpih * sincos_coefs[13];
  double q2 = q * q;
  double u = sincos_coefs[6];
  u = u * q2 + sincos_coefs[7];
  u = u * q2 + sincos_coefs[8];
  u = u * q2 + sincos_coefs[9];
  u = u * q2 + sincos_coefs[10];
  u = u * q;

  asm("xor %0, %1, %2\n\t"
      : "=r"(u)
      : "r"(sign), "0"(u));
  return u;
}

// def pacos(x):
//     if numpy.abs(x) <= 0.5:
//         q = x
//         q2 = x * x
//     else:
//         q2 = (1 - numpy.abs(x)) / 2 #cos^2(pi/2-x/2)
//         q = numpy.sqrt(q2) #cos(pi/2-x/2)
//     u =          -0.04241737375979984237
//     u = u * q2 + -0.02399399965364758941
//     u = u * q2 + -0.045520635631714566338
//     u = u * q2 + -0.074946966874239726031
//     u = u * q2 + -0.16666782005775337971
//     u = u * q2 + -0.99999999598373801035
//     u = u * q
//     # u = u
//     if numpy.abs(x) > 0.5:
//         u = -numpy.sign(x) * (u * 2 + 1.570796326794896558)
//     # else:

//     return u + 1.570796326794896558
static __ldm_fix double acos_coefs[] = {
    -0.04241737375979984237,
    -0.02399399965364758941,
    -0.045520635631714566338,
    -0.074946966874239726031,
    -0.16666782005775337971,
    -0.99999999598373801035,
    1.570796326794896558};
double pacos(double x) {
  double absx = fabs(x);
  double q, q2;
  long sign = x < 0 ? 0L << 63 : 1L << 63;

  if (absx <= 0.5) {
    q = x;
    q2 = x * x;
  } else {
    q2 = (1 - absx) * 0.5;
    q = sqrt(max(0.0, q2));
  }
  double u = acos_coefs[0];
  u = u * q2 + acos_coefs[1];
  u = u * q2 + acos_coefs[2];
  u = u * q2 + acos_coefs[3];
  u = u * q2 + acos_coefs[4];
  u = u * q2 + acos_coefs[5];
  u = u * q;
  if (absx > 0.5) {
    u = u * 2.0 + acos_coefs[6];
    asm("xor %2, %1, %0"
        : "=r"(u)
        : "r"(sign), "0"(u));
  }
  return u + acos_coefs[6];
}
#endif
/* The dihedral between atom 0,1,2 and 1,2,3 */
struct dihedral_geometry {
  vec<real> ffactor[4];
  real sinphi, cosphi, phi;
  template<typename T>
  __always_inline dihedral_geometry(T x) {
    vec<real> r01 = x[0] - x[1];
    vec<real> r21 = x[2] - x[1];
    vec<real> r23 = x[2] - x[3];
    real r21norm = r21.norm();
    real inv_r21norm = 1.0 / r21norm;
    real inv_r21norm2 = inv_r21norm * inv_r21norm;
    vec<real> n = r01.cross(r21);
    vec<real> m = r21.cross(r23);
    real inv_nnorm = 1.0 / n.norm();
    real inv_mnorm = 1.0 / m.norm();
    vec<real> nhat = n * inv_nnorm;
    vec<real> mhat = m * inv_mnorm;
    vec<real> nxm = nhat.cross(mhat);
    real sinsign = -r21.dot(nxm);
    
    cosphi = nhat.dot(mhat);
    sinphi = sinsign < 0 ? -nxm.norm() : nxm.norm();
    ffactor[0] = -r21norm * inv_nnorm * nhat;
    ffactor[3] = r21norm * inv_mnorm * mhat;
    vec<real> svec = -r01.dot(r21) * inv_r21norm2 * ffactor[0] + r23.dot(r21) * inv_r21norm2 * ffactor[3];
    ffactor[1] = -svec - ffactor[0];
    ffactor[2] = svec - ffactor[3];
  }
  __always_inline const real rad() const {
    #ifndef __sw_slave__
    return -atan2(sinphi, cosphi);
    #else
    return (sinphi > 0 ? -1.0:1.0) * pacos(cosphi);
    #endif
  }
};

struct angle_geometry {
  vec<real> ffactor[3];
  real sinth, costh, theta;
  template<typename T>
  __always_inline angle_geometry(T x) {
    vec<real> r01 = x[0] - x[1];
    vec<real> r21 = x[2] - x[1];
    
    vec<real> n = r01.cross(r21);
    real inv_r01norm = 1 / r01.norm();
    real inv_r21norm = 1 / r21.norm();
    real nnorm = n.norm();
    real inv_nnorm = 1 / nnorm;
    vec<real> r01hat = r01 * inv_r01norm;
    vec<real> r21hat = r21 * inv_r21norm;
    vec<real> nhat = n * inv_nnorm;
    if (nnorm == 0) nhat = 0;
    ffactor[0] = nhat.cross(r01hat) * inv_r01norm;
    ffactor[2] = r21hat.cross(nhat) * inv_r21norm;
    ffactor[1] = -ffactor[0] - ffactor[2];
    costh = r01hat.dot(r21hat);
    sinth = nnorm * inv_r01norm * inv_r21norm;
  }
  __always_inline const real rad() const{
    #ifndef __sw_slave__
    return acos(costh);
    #else
    return pacos(costh);
    #endif
  }
};
